#! /opt/local/bin/ruby -w

# convert_koi_ipa_to_utf.rb
# MovaX
#
# Created by Andrei Popov on 7/25/06.
# Copyright 2006 Andrei Popov (andrei@ceesaxp.org). All rights reserved.

# This script will convert KOI8-encoded Mueller dictionary to UNICODE (UTF-8) one, preserving IPA pronounciation symbols.  Results will be dumped into an XML file with record structured as:
# <dictionaryRecord>
#   <term>word</term>
#   <pronounciation>word</pronounciation>
#   <definitions>
#     <definition order="1" partOfSpeech="noun">word definition and examples follow</definition>
#     <definition order="2" partOfSpeech="adjective">word definition and examples follow</definition>
#   </definition>
# </dictionaryRecord>
#
require "iconv"

convertKOI2UTF = Iconv.new("UTF-8", "KOI8-R")

class IPAConvert
  def initialize
    # Based on siliap93.xml from http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=IPAhome
    @silIpaUnicodeMap = {
      "0x20" => "0x0020", "0x21" => "0x030B", "0x22" => "0x0069", "0x22" => "0x0131",
      "0x23" => "0x0304", "0x24" => "0x0300", "0x25" => "0x030F", "0x26" => "0x030C",
      "0x27" => "0x02BC", "0x28" => "0x0306", "0x29" => "0x0303", "0x2A" => "0x030A",
      "0x2B" => "0x031F", "0x2C" => "0x002C", "0x2D" => "0x0324", "0x2E" => "0x002E",
      "0x2F" => "0x002F", "0x30" => "0x0330", "0x31" => "0x0318", "0x32" => "0x0319",
      "0x33" => "0x031D", "0x34" => "0x031E", "0x35" => "0x032A", "0x36" => "0x033B",
      "0x37" => "0x031C", "0x38" => "0x0325", "0x39" => "0x032F", "0x3A" => "0x02E1",
      "0x3B" => "0x029F", "0x3C" => "0x207F", "0x3D" => "0x0320", "0x3E" => "0x02D1",
      "0x3F" => "0x0294", "0x40" => "0x0301", "0x41" => "0x0251", "0x42" => "0x03B2",
      "0x44" => "0x00F0", "0x45" => "0x025B", "0x46" => "0x0264", "0x47" => "0x0262",
      "0x48" => "0x02B0", "0x49" => "0x026A", "0x4A" => "0x02B2", "0x4B" => "0x029C",
      "0x4C" => "0x026E", "0x4D" => "0x0271", "0x4E" => "0x014B", "0x4F" => "0x00F8",
      "0x50" => "0x0275", "0x51" => "0x00E6", "0x52" => "0x027E", "0x53" => "0x0283",
      "0x54" => "0x03B8", "0x55" => "0x028A", "0x56" => "0x028B", "0x57" => "0x02B7",
      "0x58" => "0x03C7", "0x59" => "0x028F", "0x5A" => "0x0292", "0x5B" => "0x005B",
      "0x5C" => "0x005C", "0x5D" => "0x005D", "0x5E" => "0x0302", "0x5F" => "0x0308",
      "0x60" => "0x0329", "0x61" => "0x0061", "0x62" => "0x0062", "0x63" => "0x0063",
      "0x64" => "0x0064", "0x65" => "0x0065", "0x66" => "0x0066", "0x67" => "0x0261",
      "0x68" => "0x0068", "0x69" => "0x0069", "0x6A" => "0x006A", "0x6B" => "0x006B",
      "0x6C" => "0x006C", "0x6D" => "0x006D", "0x6E" => "0x006E", "0x6F" => "0x006F",
      "0x70" => "0x0070", "0x71" => "0x0071", "0x72" => "0x0072", "0x73" => "0x0073",
      "0x74" => "0x0074", "0x75" => "0x0075", "0x76" => "0x0076", "0x77" => "0x0077",
      "0x78" => "0x0078", "0x79" => "0x0079", "0x7A" => "0x007A", "0x7B" => "0x0280",
      "0x7C" => "0x031A", "0x7D" => "0x027D", "0x7E" => "0x033D", "0x7F" => "0x007F",
      "0x81" => "0x0252", "0x82" => "0x0258", "0x83" => "0x0361", "0x84" => "0x2016",
      "0x87" => "0x0298", "0x88" => "0x030B", "0x89" => "0x030B", "0x8A" => "0x02E5",
      "0x8B" => "0x2191", "0x8C" => "0x0250", "0x8D" => "0x0254", "0x8E" => "0x01C0",
      "0x8F" => "0x0301", "0x90" => "0x0301", "0x91" => "0x02E6", "0x92" => "0x01C1",
      "0x93" => "0x0304", "0x94" => "0x0304", "0x95" => "0x02E7", "0x96" => "0x007C",
      "0x97" => "0x01C3", "0x98" => "0x0300", "0x99" => "0x0300", "0x9A" => "0x02E8",
      "0x9B" => "0x2193", "0x9C" => "0x01C2", "0x9D" => "0x030F", "0x9E" => "0x030F",
      "0x9F" => "0x02E9", "0xA1" => "0x030A", "0xA2" => "0x031E", "0xA3" => "0x031D",
      "0xA4" => "0x032C", "0xA5" => "0x0325", "0xA6" => "0x0339", "0xA7" => "0x0282",
      "0xA8" => "0x0279", "0xA9" => "0x0260", "0xAA" => "0x0319", "0xAB" => "0x0259",
      "0xAC" => "0x0289", "0xAD" => "0x0320", "0xAE" => "0x0268", "0xAF" => "0x0276",
      "0xB0" => "0x033A", "0xB1" => "0x031F", "0xB2" => "0x0274", "0xB3" => "0x02E4",
      "0xB4" => "0x028E", "0xB5" => "0x026F", "0xB8" => "0x0278", "0xB9" => "0x02A2",
      "0xBA" => "0x0253", "0xBB" => "0x032F", "0xBC" => "0x0330", "0xBD" => "0x0290",
      "0xBE" => "0x006A", "0xBF" => "0x0153", "0xC0" => "0x0295", "0xC1" => "0x0318",
      "0xC2" => "0x026C", "0xC3" => "0x028C", "0xC4" => "0x0263", "0xC6" => "0x029D",
      "0xC7" => "0x02CC", "0xC8" => "0x02C8", "0xC9" => "0x1D50", "0xCA" => "0x200A",
      "0xCB" => "0xF181", "0xCC" => "0x2197", "0xCD" => "0x2198", "0xCE" => "0x025C",
      "0xCF" => "0x025E", "0xD0" => "0x0324", "0xD1" => "0x033C", "0xD2" => "0x0281",
      "0xD3" => "0x027B", "0xD4" => "0x1D51", "0xD5" => "0x02DE", "0xD6" => "0x002D",
      "0xD7" => "0x0284", "0xDA" => "0x030B", "0xDB" => "0x0301", "0xDC" => "0x0304",
      "0xDD" => "0x0300", "0xDE" => "0x030F", "0xDF" => "0x0302", "0xE0" => "0x030C",
      "0xE1" => "0x0306", "0xE2" => "0x0303", "0xE3" => "0x028D", "0xE4" => "0x027A",
      "0xE5" => "0x0270", "0xE6" => "0x0302", "0xE7" => "0x0265", "0xE9" => "0x0302",
      "0xEA" => "0x0256", "0xEB" => "0x0257", "0xEC" => "0x02E0", "0xED" => "0x203F",
      "0xEE" => "0x0267", "0xEF" => "0x025F", "0xF0" => "0x0127", "0xF1" => "0x026D",
      "0xF3" => "0x030C", "0xF4" => "0x030C", "0xF5" => "0x0299", "0xF6" => "0x0268",
      "0xF7" => "0x0273", "0xF8" => "0x0272", "0xF9" => "0x02D0", "0xFA" => "0x0266",
      "0xFB" => "0x02A1", "0xFC" => "0x0291", "0xFD" => "0x029B", "0xFE" => "0x0255",
      "0xFF" => "0x0288",
      # the following codes are digraphs
      "0x43" => "0x0063 0x0327", "0x80" => "0x02E9 0x02E7", "0x85" => "0x02E5 0x02E7", 
      "0x86" => "0x02E5 0x02E9", "0xD8" => "0x02E7 0x02E5", "0xD9" => "0x02E7 0x02E9", 
      "0xE8" => "0x02E9 0x02E5"
    }
  end

  def convert(s)
    # Take SIL IPA93 encoded string and convert it to Unicode (UTF-8)
    converted = []
    len = 0
    puts s
    s.each_byte { |b|
      hexCode = "0x%X" % b
      @silIpaUnicodeMap[hexCode].split.each { |d|
        converted << d.hex
        len += 1
      }
    }
    return converted.pack("U" * len)
  end
end

def myTest
  ipa = IPAConvert.new
  $stdin.each { |t|
    puts ipa.convert(t)
  }
end

myTest
